# coding=utf-8
 
import json
from urllib import parse

import requests
from lxml import etree
import httpx


# 开启session，省得手动存cookie
session = requests.session()

# 关键字
keyword = '湖州'

# url
toutiao_url = f"https://so.toutiao.com/search?keyword={keyword}&pd=information&source=input&dvpf=pc&aid=4916&page_num=0"

# 请求头信息
headers = {
    'Host': 'so.toutiao.com',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:92.0) Gecko/20100101 Firefox/92.0',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
    'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
    'Accept-Encoding': 'gzip, deflate, br',
    'Connection': 'keep-alive',
    'Cookie': 'ttwid=1%7ChveX3IaM1rCFpgcCqVvRa9-PB1KQY54JgNHZ571C2ig%7C1629966545%7C00f5845d6325f531c8146174f78c8e19cb6e330c82af9703043aa7def3ad90eb; _S_WIN_WH=1257_638; _S_DPR=2; _S_IPAD=0; MONITOR_WEB_ID=7000652996464199198; tt_webid=7000655267407087112; WIN_WH=1257_638; PIXIEL_RATIO=2; FRM=new; s_v_web_id=verify_a3b8adf22ef22eb950fb9aac4f01f5f7; _signature=_02B4Z6wo00f01yX5w3AAAICD0s56Lz8NzWMl2cfAAKhjb7; ttcid=28f70453bb7643c39dfabd813658acae41; tt_scid=hXNLNhjKaDvDm0iee5wNIM71mAHSYcsaKuK7VlGCpKeJaov1byFFYGXBt6OtB1vob954',
    'Upgrade-Insecure-Requests': '1',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-User': '?1',
    'Cache-Control': 'max-age=0, no-cache',
    'Pragma': 'no-cache'
}


from requests.packages import urllib3
urllib3.disable_warnings()

# get请求
response_body = session.get(toutiao_url, headers=headers, verify=False)

raise_for_status = response_body.status_code
print(raise_for_status)

# 这里乱码，有意思的是
print(response_body)


# 这是源码
# a = ''' '''

selector = etree.HTML(response_body.text)
# 这个xpath可以找到当前页所有详情链接
div_content_list = selector.xpath('//a[@class="text-ellipsis text-underline-hover"]/@href')
print(div_content_list)



























